package com.dslcode.spider.webmagic.yachang;

import com.dslcode.spider.webmagic.yachang.db.ArtWork;
import com.google.common.collect.Lists;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Html;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;
import java.util.stream.Collectors;

/**
 * Created by dongsilin on 2017/6/1.
 */
@Slf4j
@Component
public class ArtWorkPageProcessor implements PageProcessor {

    private Site site = Site.me()
		.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.57 Safari/537.36")
		.setRetryTimes(2)
		.addCookie("Cookie", "pre_showmark=1; ALLYESID4=1164BC0A6F3E15D3; gr_user_id=09dcfdd6-c2a6-4b66-9952-33c417e4fa5d; artron_67ae_saltkey=UgiT054t; "
			+ "artron_67ae_lastvisit=1557731446; artron_67ae_visitedfid=6; artron_67ae_viewid=tid_4871267; artron_67ae_auth=dcfaVchrpcldLFcbf8M27rnuEQicOxQf7cDZG5%2BL%2B3oyf7Wg%2BsW%2BUhAO0%2FVzccI8dAbDFJVUForMBKnPiM6GIei0KhLa; artron_reward_log=daylogin%2C1171931; artron_auth=75c2OqBVKIzbmLJXUVUeA2aCJ7h8mIg7a280efv8mEl01fA9k3MWBSkXpxMRa%2FIMHxVWEJvd5NB2IIrS72LE%2FHLtQj2h; artron_loginuser=%E6%9D%8E%E5%BB%B7%E8%B1%B9144; artron_67ae_ulastactivity=bdf4vheBVhSdwwjFkUjKvisrBnHHBdlTlJFuao4xTd3HRON%2F66Xr; artron_67ae_smile=1D1; _at_pt_0_=2548811; _at_pt_1_=%E6%9D%8E%E5%BB%B7%E8%B1%B9144; _at_pt_2_=cfdaa6a647a18a9ba6004ec8a7722d52; artron_track_ebiz_referer=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3D18bKCWFf7sZaVhG1TEPw8QE3fcb2uwdZLC0ljYYFWi63GgZ_naYeGs-WD0wPly0MrSXOZ7sa-ioVncTnppsTba%26wd%3D%26eqid%3D8db079a00022537d000000035ce898a5; Hm_lvt_851619594aa1d1fb8c108cde832cc127=1556846637,1556846751,1558747437,1558747562; Hm_lpvt_851619594aa1d1fb8c108cde832cc127=1558769649")
		;
	private volatile boolean handlePageUrlList = false;

    @Override
    public void process(Page page) {
		Selectable url = page.getUrl();
		if (url.regex("https://artso\\.artron\\.net/auction/search_auction*").match()) {
			getPageUrlList(page);
			List<ArtWork> artWorkList = getListData(page);
			page.putField("artWorkList", artWorkList);
		} else {
			ArtWork artWork = getDetailData(page);
			page.putField("artWork", artWork);
		}
    }



	/**
	 * 列表页码处理
	 */
	private void getPageUrlList(Page page) {
		Html html = page.getHtml();
//		if (!handlePageUrlList) {
			List<String> urls = html.$(".listJump>a", "href").all();
			if (CollectionUtils.isNotEmpty(urls)) {
//				String lastUrl = urls.get(urls.size() - 2);
//				int idx = lastUrl.lastIndexOf("page=");
//				if (idx == -1) return;
//				String prefix = StringUtils.substring(lastUrl, 0, idx + 5);
//				String totalPageNum = StringUtils.substring(lastUrl, idx + 5);
//				if (StringUtils.isEmpty(totalPageNum)) return;
//				List<String> list = IntStream.rangeClosed(2, Integer.parseInt(totalPageNum))
//					.mapToObj(i -> prefix + i)
//					.collect(Collectors.toList());
//				page.addTargetRequests(list);
				String netxPage = urls.get(urls.size() - 1);
				log.info("netxPage = {}", netxPage);
				page.addTargetRequests(Lists.newArrayList(netxPage));
			}
//			handlePageUrlList = true;
//		}
	}

	/**
	 * 列表数据处理
	 */
	private List<ArtWork> getListData(Page page) {
		Html html = page.getHtml();
		List<ArtWork> artWorkList = html.$(".listImg>ul>li").nodes().parallelStream()
			.filter(node -> !StringUtils.equals("clear", node.$("li", "class").get()))
			.map(node -> {
				String httpUrl = node.$(".imgWrap>a", "href").get();
				String thirdId = StringUtils.substring(httpUrl, StringUtils.indexOf(httpUrl, "-art")+4, httpUrl.length()-1);
				String lotNum = node.xpath("//p[2]/em/text()").get();
				String title = node.xpath("//h3/a/text()").get();
				String evaluatePrice = node.xpath("//p[3]/em/i[1]/text()").get();
				String evaluatePriceCurrencyImg = node.xpath("//p[3]/em/img/@src").get();
				String evaluatePriceCurrency = StringUtils.isNotEmpty(evaluatePriceCurrencyImg)?
					StringUtils.substring(evaluatePriceCurrencyImg, evaluatePriceCurrencyImg.lastIndexOf("/")+1, evaluatePriceCurrencyImg.lastIndexOf(".")) : null;
				evaluatePriceCurrencyImg = StringUtils.isNotEmpty(evaluatePriceCurrencyImg)? "http://artso.artron.net/auction/".concat(evaluatePriceCurrencyImg) : null;
				String dealPrice = node.xpath("//p[4]/em/i[1]/text()").get() + node.xpath("//p[4]/em/i[2]/text()").get();
				String dealPriceCurrencyImg = node.xpath("//p[4]/em/img/@src").get();
				String dealPriceCurrency = StringUtils.isNotEmpty(dealPriceCurrencyImg)?
					StringUtils.substring(dealPriceCurrencyImg, dealPriceCurrencyImg.lastIndexOf("/")+1, dealPriceCurrencyImg.lastIndexOf(".")) : null;
				dealPriceCurrencyImg = StringUtils.isNotEmpty(dealPriceCurrencyImg)? "http://artso.artron.net/auction/".concat(dealPriceCurrencyImg) : null;

				String auctionCompanyName = node.xpath("//p[5]/em/a/text()").get();
				String auctionCompanyUrl = node.xpath("//p[5]/em/a/@href").get();
				String auctionDate = node.xpath("//p[6]/em/i[1]/text()").get();
				String miniImageUrl = "https://auction1-img.artimg.net/getpic_auctionlist_ipad.php?picid=art"+thirdId+"&w=185&h=185";
				return new ArtWork()
					.setThirdId(thirdId)
					.setLotNum(lotNum)
					.setTitle(title)
					.setHttpUrl(httpUrl)
					.setEvaluatePrice(evaluatePrice)
					.setEvaluatePriceCurrency(evaluatePriceCurrency)
					.setEvaluatePriceCurrencyImg(evaluatePriceCurrencyImg)
					.setDealPrice(dealPrice)
					.setDealPriceCurrency(dealPriceCurrency)
					.setDealPriceCurrencyImg(dealPriceCurrencyImg)
					.setAuctionCompanyName(auctionCompanyName)
					.setAuctionCompanyUrl(auctionCompanyUrl)
					.setAuctionDate(auctionDate)
					.setMiniImageUrl(miniImageUrl)
					;
			})
			.collect(Collectors.toList());

		// 继续爬detail
		page.addTargetRequests(artWorkList.stream().map(ArtWork::getHttpUrl).collect(Collectors.toList()));

		return artWorkList;
	}

	/**
	 * 详情页数据处理
	 */
	private ArtWork getDetailData(Page page) {
		String httpUrl = page.getUrl().get();
		String thirdId = StringUtils.substring(httpUrl, StringUtils.indexOf(httpUrl, "-art")+4, httpUrl.length()-1);
		String bigImageUrl = "https:"+page.getHtml().$(".picSmallList .smallCell>img", "data-big").get();
		return new ArtWork().setThirdId(thirdId).setBigImageUrl(bigImageUrl);
	}

    @Override
    public Site getSite() {
        return this.site;
    }

}
